********************************************************************************
*** Merge Experian Data

use "data/cleaned/hugo/hourly collapsed action.dta", clear
*first merge crosswalk to hugo

preserve
	use "data/inputs/Experian/202007301A_Reseq_NoPII.dta" 
	keep uid record_nb 
	keep if uid != "UID"
	rename uid id
	tempfile crosswalk
	save `crosswalk'
restore

* merge hugo users to crosswalk and confirm they all have a record
merge 1:1 id using `crosswalk', gen(_m_crosswalk) assert(2 3)

*merge with experian
merge 1:1 record_nb using "data/inputs/Experian/202007301A_MEMBER_022019.dta", gen(_m_experian)

*no non-matches from experian
assert _m_experian != 2

*get rid of those who aren't actually in hugo
drop if _m_crosswalk == 2

********************************************************************************
*** Check Experian Data

*there is a crosswalk for everyone in the sample;
*create indcator for those with no report
gen gotcred = ""
replace gotcred = "No Report" if _m_experian == 1
replace gotcred = "Has Report" if _m_experian == 3

*label variables
label variable age "Age"
label variable car_year "Car Year"
label variable car_value "Vehicle Value"
label variable mileage "Car Mileage"
label variable totalagentpremium "3-Month Premium"
label variable makemodelyearvalue_private "MMY Private Value"
label variable female "Female"
label variable sr22 "SR-22"
label variable valid_license "Valid License"

********************************************************************************
*** credit report stats by hugo users and californians and americans


*add full Experian data
drop state_cd
gen state_cd = ""
append using "data/inputs/Experian/202007301A_SAMPLE_022019.dta", force

*there are 1,000,000 observations in the Experian data, corresponding to non-missing(states)

gen appliedhugo = 1
gen boundhugo = 1 if bound == 1
gen ussamp = 1 if !mi(state_cd)
gen casamp = 1 if state_cd == "CA"

label variable appliedhugo "Applied Hugo"
label variable boundhugo "Bound Hugo"
label variable ussamp "US Sample"
label variable casamp "CA Sample"


********************************************************************************
*** Prepare the Variables

*total inquiries
gen totinq = p13_iqz9420
replace totinq = 0 if p13_iqz9420 == 0 | p13_iqz9420 > 90 & !mi(p13_iqz9420)

*has auto loans
gen autoloan = 0 if p13_aua0300 == 0 | p13_aua0300 == 99
replace autoloan = 1 if p13_aua0300 > 0 & p13_aua0300 < 90

*auto loan amount
gen autoloanamount = p13_aua5020
replace autoloanamount = 0 if p13_aua5020 > 999999990 & !mi(p13_aua5020)

*credit card balance
gen cardbalance = p13_bcc5020
replace cardbalance = 0 if p13_bcc5020 > 999999990 & !mi(p13_bcc5020)

*credit card limit
gen cardlimit = p13_bcc5620
replace cardlimit = 0 if p13_bcc5620 > 999999990 & !mi(p13_bcc5620)

*total revolving trade limit
gen totalrevlimit = p13_rev5620
replace totalrevlimit = 0 if p13_rev5620 > 999999990 & !mi(p13_rev5620)

*nonmedical collections balance
gen nonmedcoll = p13_col5065
replace nonmedcoll = 0 if p13_col5065 > 999999990 & !mi(p13_col5065)

*medical collections balance
gen medcoll = p13_col5066
replace medcoll = 0 if p13_col5066 > 999999990 & !mi(p13_col5066)

*total debt in collections
gen totcoll = p13_col5060
replace totcoll = 0 if p13_col5060 > 999999990 & !mi(p13_col5060)

*amount past due on 30+ days delinquent
gen totdue30 = p13_all5740
replace totdue30 = 0 if p13_all5740 > 999999990 & !mi(p13_all5740)

*total debt past due
egen totaldebtpastdue = rowtotal(totcoll totdue30)

*ever bankruptcy?
gen everbank = 1 if p13_all9120 > 0 & !mi(p13_all9120)
replace everbank = 0 if p13_all9120 == 0

*30, 60, 90 days past due
	gen totdel30 = p13_all2420
	replace totdel30 = 0 if p13_all2420 > 90 & !mi(p13_all2420)
	gen totdel90 = p13_all2480
	replace totdel90 = 0 if p13_all2480 > 90 & !mi(p13_all2480)
	
	assert totdel30>=totdel90

*income insight
replace income_insight_score = . if income_insight_score > 9000

*vantage_v4_score
replace vantage_v4_score = . if vantage_v4_score > 9000

********************************************************************************
*** Start with experian utilization expclar for those in hugo
gen balcred = p13_rev7110
gen balcredraw = p13_rev7110

*some of the missing codes correspond to credit constraints
*(having no revolving credit, for example)
/*
999: No trade excluding collections
998: No revolving trade
997: No revolving trade reported within timeframe required
996: No open revolving trade reported within timeframe required
995: No open revolving trade reported within timeframe required with credit amount > $0
*/
*any of the above suggest a constraint on credit
replace balcred = 101 if balcred > 990 & balcred <= 999 & !mi(balcred)
replace balcred = 101 if balcred > 100 & !mi(balcred)

*create another variable for a tabulation
*first for sample cross tabulation
gen credsamp = ""
replace credsamp = "US Sample" if ussamp == 1
replace credsamp = "Hugo Users" if boundhugo == 1

*define those with no access to credit (either using more than 100% or 0 available)
gen isconstrained = 1 if balcred > 100 & !mi(balcred)
replace isconstrained = 0 if balcred <= 100 & !mi(balcred)

********************************************************************************
*** label things
label variable vantage_v4_score "Credit Score"
label variable balcredraw "Credit Utilization"
label variable dti1_score_total_debt "DTI1 Total Debt to Income Ratio"
label variable income_insight_score "Income Insight Score"
label variable totinq "Total Inquiries"
label variable autoloan "Has Auto Loan"
label variable autoloanamount "Auto Loan Amount"
label variable cardbalance "Credit Card Balance"
label variable cardlimit "Credit Card Limit"
label variable nonmedcoll "Non-Medical Collections"
label variable medcoll "Medical Collections"
label variable everbank "Ever Filed for Bankruptcy"
label variable totdel30 "Number of Debts 30 Days Delinquent"
label variable totdel90 "Number of Debts 90 Days Delinquent"
label variable totcoll "Total Debt in Collections"
label variable totaldebtpastdue "Total Debt Past Due"

drop p13*

*save big dataset with the 1,000,000 sample
save "data/int/hugo and experian.dta", replace

*save smaller dataset with just hugo users
drop if mi(cohort)
save "data/int/collapsed action with experian.dta", replace

beep
